clear all
set more off
set mem 10000000
set maxvar 120000
set matsize 10000
version 15

*************************************************************************************
*** Script to compile inputs for consumer-surplus-based cost-benefit calculations ***
*************************************************************************************

** Set file paths
do "$path_code/paths.do"

** Set graph scheme
cd "$path/code/analyze"
set scheme fb, perm

****************************************************************** 
****************************************************************** 

	// Set assumed parameter values
numlist "0(0.005)1"
global disc = "`r(numlist)'" // range of discount rates
global pop = "300 1000 2000" // considering villages of 3 population sizes
global elast = "0.56 0.58 0.62" // from Mahadevan (2020); Burgess et al. (2020a)
global hh_size = "5" // average household size from the Census of India
global exp_growth = "0.03" // ad hoc, following Lee, Miguel, and Wolfram (2020b)
global kwh_growth = "0.03" // ad hoc, following Lee, Miguel, and Wolfram (2020b)

***

	// Bring in assumed incrases from NSS regression results
{
use "$results/nss_reg_results.dta", clear

	// kWh scale-up, pooled
sum beta if regs=="ols" & yvar=="elec_quantity" & ytag=="" & ifs=="" ///
	& fes=="year c.year#exp05_st_4ile c.year#exp05_ntl_10ile c.year#st_code stdt"
assert r(N)==1
local num = r(mean)

sum beta if regs=="ols" & yvar=="elec_q_yn" & ytag=="" & ifs=="" ///
	& fes=="year c.year#exp05_st_4ile c.year#exp05_ntl_10ile c.year#st_code stdt"
assert r(N)==1
local denom = r(mean)

global kwh_pooled = `num'/`denom'


	// kWh scale-up, quintile 1
sum beta if regs=="ols" & yvar=="elec_quantity" & ytag=="_dec12" & ifs=="" ///
	& fes=="year c.year#exp05_st_4ile c.year#exp05_ntl_10ile c.year#st_code stdt"
assert r(N)==1
local num = r(mean)

sum beta if regs=="ols" & yvar=="elec_q_yn" & ytag=="_dec12" & ifs=="" ///
	& fes=="year c.year#exp05_st_4ile c.year#exp05_ntl_10ile c.year#st_code stdt"
assert r(N)==1
local denom = r(mean)

global kwh_q1 = `num'/`denom'


	// kWh scale-up, quintiles 2-5
sum beta if regs=="ols" & yvar=="elec_quantity" & ytag=="_dec30" & ifs=="" ///
	& fes=="year c.year#exp05_st_4ile c.year#exp05_ntl_10ile c.year#st_code stdt"
assert r(N)==1
local num = r(mean)

sum beta if regs=="ols" & yvar=="elec_q_yn" & ytag=="_dec30" & ifs=="" ///
	& fes=="year c.year#exp05_st_4ile c.year#exp05_ntl_10ile c.year#st_code stdt"
assert r(N)==1
local denom = r(mean)

global kwh_q25 = `num'/`denom'


	// kWh scale-up, pooled (high hours)
sum beta_hrs_hi if regs=="ols (HRS interaction)" & yvar=="elec_quantity" & ytag=="" & ifs=="" ///
	& fes=="year c.year#exp05_st_4ile c.year#exp05_ntl_10ile c.year#st_code stdt"
assert r(N)==1
local num = r(mean)

sum beta_hrs_hi if regs=="ols (HRS interaction)" & yvar=="elec_q_yn" & ytag=="" & ifs=="" ///
	& fes=="year c.year#exp05_st_4ile c.year#exp05_ntl_10ile c.year#st_code stdt"
assert r(N)==1
local denom = r(mean)

global kwh_pooled_hrs = `num'/`denom'


	// kWh scale-up, quintile 1 (high hours)
sum beta_hrs_hi if regs=="ols (HRS interaction)" & yvar=="elec_quantity" & ytag=="_dec12" & ifs=="" ///
	& fes=="year c.year#exp05_st_4ile c.year#exp05_ntl_10ile c.year#st_code stdt"
assert r(N)==1
local num = r(mean)

sum beta_hrs_hi if regs=="ols (HRS interaction)" & yvar=="elec_q_yn" & ytag=="_dec12" & ifs=="" ///
	& fes=="year c.year#exp05_st_4ile c.year#exp05_ntl_10ile c.year#st_code stdt"
assert r(N)==1
local denom = r(mean)

global kwh_q1_hrs = `num'/`denom'


	// kWh scale-up, quintiles 2-5 (high hours)
sum beta_hrs_hi if regs=="ols (HRS interaction)" & yvar=="elec_quantity" & ytag=="_dec30" & ifs=="" ///
	& fes=="year c.year#exp05_st_4ile c.year#exp05_ntl_10ile c.year#st_code stdt"
assert r(N)==1
local num = r(mean)

sum beta_hrs_hi if regs=="ols (HRS interaction)" & yvar=="elec_q_yn" & ytag=="_dec30" & ifs=="" ///
	& fes=="year c.year#exp05_st_4ile c.year#exp05_ntl_10ile c.year#st_code stdt"
assert r(N)==1
local denom = r(mean)

global kwh_q25_hrs = `num'/`denom'


	// kWh scale-up, pooling village sizes, split by expenditure quartile
foreach exp in 1 2 3 4 {
	
	sum beta if regs=="ols, uncollapsed" & yvar=="elec_quantity" & ytag=="" & ifs==" if exp_quartile==`exp'" ///
		& fes=="year c.year#exp05_st_4ile c.year#exp05_ntl_10ile c.year#st_code stdt"
	assert r(N)==1
	local num = r(mean)

	sum beta if regs=="ols, uncollapsed" & yvar=="elec_q_yn" & ytag==""  & ifs==" if exp_quartile==`exp'" ///
		& fes=="year c.year#exp05_st_4ile c.year#exp05_ntl_10ile c.year#st_code stdt"
	assert r(N)==1
	local denom = r(mean)

	global kwh_exp`exp' = `num'/`denom'
	
}	

}

***

	// Extract summary stats from uncollapsed NSS panel
{	
use "$panel/panel_dataset_dd_nss_uncollapsed.dta", clear

	// make weights representative at the national level (rather than district level)
gen weight_pop = weight_normalized * tot_p_dist
	
	// mean kWh per month
sum elec_quantity if year==2010 & elec_q_yn>0 [aw=weight_pop], detail
global kwh_mean = r(mean)

	// mean electricity price (in 2010 rupees)
assert elec_price==. if elec_q_yn==0
egen p_med_vill = median(elec_price) if year==2010 & elec_q_yn>0, by(fsu_serial_no)	
egen count_vill = count(stdt) if year==2010 & elec_q_yn>0, by(fsu_serial_no)
sum elec_price if year==2010 & elec_q_yn>0, detail
sum p_med_vill if year==2010 & elec_q_yn>0, detail
sum elec_price if year==2010 & elec_q_yn>0 [aw=weight_pop], detail
sum p_med_vill if year==2010 & elec_q_yn>0 [aw=weight_pop], detail
	// medians are very similar
sum elec_price if year==2010 & elec_q_yn>0 [aw=weight_pop], detail
global kwh_pmean = r(p50)

	// make expenditure quartiles
sum mth_pc_expE1 [aw=weight_pop], detail
gen exp_quartile = .
replace exp_quartile = 1 if mth_pc_expE1<=r(p25)
replace exp_quartile = 2 if mth_pc_expE1<=r(p50) & exp_quartile==.
replace exp_quartile = 3 if mth_pc_expE1<=r(p75) & exp_quartile==.
replace exp_quartile = 4 if exp_quartile==.
tab exp_quartile, gen(expQ)


	// shares by expenditure quartiles, by village size
foreach y in 2005 2010 {
	foreach pop in $pop {
		foreach exp in 1 2 3 4 {
			sum expQ`exp' [aw=weight_pop] if inrange(frame_population,`pop'-50,`pop'+50) & year==`y'
			global share_exp`exp'_`pop'_y`y' = r(mean)
		}
	}
}	

	// construct kWh using expenditure-specific estimates and village size specific shares
foreach pop in $pop {
	global kwh_`pop'_exp_shares = $kwh_exp1 * ${share_exp1_`pop'_y2010} + $kwh_exp2 * ${share_exp2_`pop'_y2010} ///
								+ $kwh_exp3 * ${share_exp3_`pop'_y2010} + $kwh_exp4 * ${share_exp4_`pop'_y2010}
}

	// construct kWh from just summary stats by village size
foreach y in 2005 2010 {
	foreach pop in $pop {
		sum elec_quantity [aw=weight_pop] if elec_q_yn>0 & inrange(frame_population,`pop'-50,`pop'+50) & year==`y'
		global kwh_mean_`pop'_y`y' = r(mean)
	}
}	

	// expenditure growth rates, as annualized percentage point increases in the share of households in each quartile
foreach pop in $pop {
	foreach exp in 1 2 3 4 {
		global delta_exp`exp'_`pop' = (${share_exp`exp'_`pop'_y2010} - ${share_exp`exp'_`pop'_y2005})/5
	}
	assert abs(${delta_exp1_`pop'} + ${delta_exp2_`pop'} + ${delta_exp3_`pop'} + ${delta_exp4_`pop'})<1e-10
}
	
	// impose ad hoc 3% expenditure growth, create hypothetical 2011 dummies for expenditure quartiles (w/o quartile cut points)
gen exp_2011 = mth_pc_expE1*(1 + $exp_growth )
sum mth_pc_expE1 [aw=weight_pop], detail
gen exp_2011_quartile = .
replace exp_2011_quartile = 1 if year==2010 & exp_2011<=r(p25)
replace exp_2011_quartile = 2 if year==2010 & exp_2011<=r(p50) & exp_2011_quartile==.
replace exp_2011_quartile = 3 if year==2010 & exp_2011<=r(p75) & exp_2011_quartile==.
replace exp_2011_quartile = 4 if year==2010 & exp_2011_quartile==.
tab exp_2011_quartile if year==2010, gen(exp2011Q)

	// shares by hypothetical 2011 expenditure quartiles, by village size
foreach pop in $pop {
	foreach exp in 1 2 3 4 {
		sum exp2011Q`exp' [aw=weight_pop] if inrange(frame_population,`pop'-50,`pop'+50) & year==2010
		global share_exp`exp'_`pop'_y2011 = r(mean)
	}
}	

	// 3% annualized expenditure growth rate, as percentage point increases in the share of households in each quartile
foreach pop in $pop {
	foreach exp in 1 2 3 4 {
		global delta_exp`exp'_`pop'_3pct = ${share_exp`exp'_`pop'_y2011} - ${share_exp`exp'_`pop'_y2010}
	}
}

	// kWh growth rates, annualized from summary stats within each village size
foreach pop in $pop {
	global delta_kwh_`pop' = max((${kwh_mean_`pop'_y2010}/${kwh_mean_`pop'_y2005})^(1/5)-1,0) // don't allow for negative kWh growth
}

}

***	

	// Extract population growth rates from Census panel
{
use "$panel/panel_dataset_no_lights.dta", clear
	
// sum vdp_pwr_h_dom_avg_11 if vd_pwr_d_dom_11==1 & inrange(tot_p11,250,350) 
// sum vdp_pwr_h_dom_avg_11 if vd_pwr_d_dom_11==1 & inrange(tot_p11,950,1050)	
// sum vdp_pwr_h_dom_avg_11 if vd_pwr_d_dom_11==1 & inrange(tot_p11,1950,2050)		

gen p_growth = (tot_p11/tot_p)^(1/10)-1
foreach pop in $pop {
	sum p_growth if inrange(tot_p11,`pop'-50,`pop'+50), detail	
	global p_growth_`pop' = r(p50)
}
}	


	// Set up parameter space for NPV calcs
clear
set obs 100000

gen disc = .
gen pop = .
gen elast = .
gen hh_size = .
gen kwh_var = ""
gen kwh_mean = .
gen elec_price = .
gen exp_growth = ""
gen kwh_growth_scenario = ""
gen kwh_growth = .
gen pop_growth = .
la var disc "Assumed annual discount rate"
la var pop "Assumed village population (in year 1, the year of electrification)"	
la var elast "Assumed price electricity of households' elasticity demand"
la var hh_size "Assumed household size"
la var kwh_var "Scenario used to determine kWh consumed per electrified household per month"
la var kwh_mean "Assumed value of kWh consumed per electrified household per month"
la var elec_price "Assumed electricity price faced by households (Rs/kWh)"
la var exp_growth "Expenditure growth scenario (only relevant for kwh_var=pop_exp_shares"
la var kwh_growth_scenario "Electricity consumption growth scenario"
la var kwh_growth "Assumed annual growth rate in electricity consumption per electrified household"
la var pop_growth "Assumed annual population growth rate"

	// create one row per scenario
local row = 1
foreach disc in $disc {
	foreach pop in $pop {
		foreach elast in $elast {
			foreach hh_size in $hh_size {
				foreach kwh in q1 q25 q1_hrs q25_hrs pop_exp_shares mean_pop { // removed {pooled, pooled_hrs, mean}, since these don't differentiate by village size
					foreach exp_growth in none nss adhoc {
						foreach kwh_growth in 0 nss $kwh_growth {
							foreach pop_growth in 0 census {
								
								if !(`pop'==2000 & inlist("`kwh'","q1","q1_hrs")) & ///
								   !(inlist(`pop',300,1000) & inlist("`kwh'","q25","q25_hrs")) & ///
								   !("`exp_growth'"!="none" & "`kwh'"!="pop_exp_shares") {
								
									replace disc = `disc' if _n==`row'
									replace pop = `pop' if _n==`row'
									replace elast = `elast' if _n==`row'
									replace hh_size = `hh_size' if _n==`row'
									replace kwh_var = "`kwh'" if _n==`row'
									if "`kwh'"=="pop_exp_shares" {
										replace kwh_mean = ${kwh_`pop'_exp_shares} if _n==`row'
									}
									else if "`kwh'"=="mean_pop" {
										replace kwh_mean = ${kwh_mean_`pop'_y2010} if _n==`row'
									}
									else {
										replace kwh_mean = ${kwh_`kwh'} if _n==`row'
									}	
									replace elec_price = ${kwh_pmean} if _n==`row'
									replace exp_growth = "`exp_growth'" if _n==`row'
									replace kwh_growth_scenario = "`kwh_growth'" if _n==`row'
									if "`kwh_growth'"=="nss" {
										replace kwh_growth = ${delta_kwh_`pop'} if _n==`row'
									}
									else {
										replace kwh_growth = `kwh_growth' if _n==`row'
									}
									if "`pop_growth'"=="census" {
										replace pop_growth = ${p_growth_`pop'} if _n==`row'
									}
									else {
										replace pop_growth = `pop_growth' if _n==`row'
									}
									local row = `row' + 1	
									assert `row'<_N	
									
								}
							}
						}
					}
				}
			}
		}
	}
}
dropmiss, obs force	


	// expand to 20 years per scenario
gen scenario = _n
order scenario	
expand 20, gen(temp_new)
sort scenario temp_new
gen t = 1 if temp_new==0
replace t = t[_n-1] + 1 if temp_new==1 & scenario==scenario[_n-1]
assert inrange(t,1,20)
drop temp_new
la var scenario "ROI scenario (unique set of parameters and assumptions)"	
la var t "Year, where t=1 is the undiscounted year of electrification"	

	// populate Q electrcity for expenditure share scenarios
gen qbase = kwh_mean if exp_growth=="none"	
replace qbase = kwh_mean if exp_growth!="none" & t==1
	// crate empty share variables
foreach exp in 1 2 3 4 {
	gen s`exp' = .
	la var s`exp' "Share of HH in expenditure quartile `exp' in year t"
}
foreach pop in $pop {
	foreach exp in 1 2 3 4 {
		// populate share variables with annual changes
		replace s`exp' = ${share_exp`exp'_`pop'_y2010} + (${delta_exp`exp'_`pop'} * (t-1)) ///
			if pop==`pop' & exp_growth=="nss" 
		replace s`exp' = ${share_exp`exp'_`pop'_y2010} + (${delta_exp`exp'_`pop'_3pct} * (t-1)) ///
			if pop==`pop' & exp_growth=="adhoc" 
	}
}
	// adjust shares to be between 0 and 1
assert inrange(s4,0,1) if inlist(exp_growth,"nss","adhoc")
assert inrange(s3,0,1) if inlist(exp_growth,"nss","adhoc")
replace s2 = s2 + s1 if s1<0
replace s1 = 0 if s1<0
replace s3 = s3 + s2 if s2<0
replace s2 = 0 if s2<0
foreach exp in 1 2 3 4 {
	assert inrange(s`exp',0,1) if inlist(exp_growth,"nss","adhoc")
}
assert abs((s1+s2+s3+s4)-1)<0.000001 if inlist(exp_growth,"nss","adhoc")
	// construct Q from shares
replace qbase = ${kwh_exp1}*s1 + ${kwh_exp2}*s2 + ${kwh_exp3}*s3 + ${kwh_exp4}*s4 ///
	if inlist(exp_growth,"nss","adhoc") & qbase==.
assert qbase!=.
la var qbase "Assumed kWh/HH for each year, + expenditure growth"	
	
	
	// populate Q electricity accounting for annual consumption growth
gen q = qbase*((1+kwh_growth)^(t-1))
la var q "Assumed kWh/HH for each year, + expenditure growth + electrricity consumption growth"
	
	// construct number of households per village, accounting for population growth
gen n_hh = pop/hh_size*((1+pop_growth)^(t-1))
la var n_hh "Assumed number of households per village, + population growth"

	// calculate CS triangles, assuming linear demand
gen yintercept = elec_price - ((1/-elast)*(elec_price/q))*q
gen height = yintercept - elec_price
gen width = q
gen area = height*width/2
la var yintercept "Y intercept of assumed linear demand curve"
la var height "Height of consumer surplus triangle"
la var width "Width of consumer surplus triangle"
la var area "Area of consumer surplus triangle (Rs per month per household)"
	
	// multiply by 12 to get annualized consumer surplus
gen cs_hh = area*12
la var cs_hh "Total consumer surplus per household in year t (Rs, undiscounted)"

	// sum across all households in village
gen cs_vill = cs_hh * n_hh	
la var cs_vill "Total consumer surplus per village in year t (Rs, undiscounted)"

	// apply annual discount factor
gen cs_vill_disc = cs_vill/((1+disc)^(t-1))
la var cs_vill_disc "Discounted counsumer surplus per village in year t (Rs)"

	// NPV of benefits per household
egen pdv_cs_vill = sum(cs_vill_disc), by(scenario)
la var pdv_cs_vill "Present discounted value of consumer surplus per village (Rs)"

	// Grab 2008-to-2010 rupee conversion rate
preserve 
import excel "$nss/INDCPIALLAINMEI.xls", sheet("FRED Graph") cellrange(A11:B33) firstrow clear
gen year = year(observation_date)
sum IND if year==2010
local cpi2010 = r(mean)
gen INDdef = IND/`cpi2010'
sum INDdef if year==2008
local cpi2008 = r(mean)
restore
	
	// Estimates of per-household costs (based on 2200 Rs per BPL household, in 2008 rupees)
	// https://eparlib.nic.in/bitstream/123456789/62767/1/14_Energy_31.pdf
gen hh_costs = pop/hh_size * 2200 / `cpi2008' // using the number of households in year 1
la var hh_costs "Assumed upfront per-household costs of electrification"	

	// Fixed costs for full electrification (from World Bank report, Table 5.1, p51)
	// Also here: https://eparlib.nic.in/bitstream/123456789/62767/1/14_Energy_31.pdf
gen FC_lo = 1300000 / `cpi2008'
gen FC_hi = 1800000 / `cpi2008'
la var FC_lo "Assumed upfront fixed cost of electrication (low)"
la var FC_hi "Assumed upfront fixed cost of electrication (high)"

	// Total NPV
gen NPV_lo = pdv_cs_vill - hh_costs - FC_lo
gen NPV_hi = pdv_cs_vill - hh_costs - FC_hi
la var NPV_lo "Net present value of electrication, with low fixed costs" 
la var NPV_hi "Net present value of electrication, with high fixed costs" 

	// ROI
gen ROI_lo = NPV_lo / (hh_costs + FC_lo)
gen ROI_hi = NPV_hi / (hh_costs + FC_hi)
la var ROI_lo "Return on investment from electrification, with low fixed costs"
la var ROI_hi "Return on investment from electrification, with high fixed costs"

br

	// Save full dataset (all years)
compress
save "$results/roi_results_cs_full.dta", replace


	// Collapse to 1 row per scenario, and save	
use "$results/roi_results_cs_full.dta", clear
keep if t==1
drop qbase q t s? n_hh yintercept height width area cs_hh cs_vill cs_vill_disc
unique scenario
assert r(unique)==r(N)
compress
save "$results/roi_results_cs.dta", replace


	// Calculate IRRs for each combination of parameters (except discount rate), and save
use "$results/roi_results_cs.dta", clear	
egen temp_lo = max(disc) if ROI_lo>0, by(pop-pop_growth)
egen temp_hi = max(disc) if ROI_hi>0, by(pop-pop_growth)
egen irr_lo = mean(temp_lo), by(pop-pop_growth)
egen irr_hi = mean(temp_hi), by(pop-pop_growth)
replace irr_lo = -.01 if irr_lo==.
replace irr_hi = -.01 if irr_hi==.
keep pop-pop_growth irr_lo irr_hi
duplicates drop
tab irr_lo if pop==300
tab irr_hi if pop==300
tab irr_lo if pop==1000
tab irr_hi if pop==1000
tab irr_lo if pop==2000
tab irr_hi if pop==2000
keep if round(elast,0.01)==0.62
reshape long irr_, i(pop-pop_growth) j(fc) string
rename irr_ irr
la var fc "Assumed upfront fixed costs of electrification (high vs. low)"
la var irr "Internal rate of return"
unique pop-pop_growth fc
assert r(unique)==r(N)
compress
save "$results/roi_results_cs_irr.dta", replace
	
	
****************************************************************** 
****************************************************************** 
